NOTE: from level 4/5 onwards I will execute most steps locally, as the datasets are not as large anymore.
Here, we will work on the level 5 of the CD4 T cell, which entails:
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("scRNA-seq/results/R_objects/level_4/CD4_T/CD4_T_integrated_level_4.rds")
# path_to_obj <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_4/CD4_T/CD4_T_integrated_level_4.rds"
path_to_save <- here::here("scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_subseted_annotated_level_5.rds")
# path_to_save <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_subseted_annotated_level_5.rds"
# path_to_save_th <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_Th_level_5_annotated.rds"
path_to_save_th <- here::here("scRNA-seq/results/R_objects/level_5/CD4_T/paper/CD4_T_Th_level_5_annotated.rds")
# Colors
color_palette <- c("#1CFFCE", "#90AD1C", "#C075A6", "#85660D", "#5A5156", "#AA0DFE",
"#F8A19F", "#F7E1A0", "#1C8356", "#FEAF16", "#822E1C", "#C4451C",
"#1CBE4F", "#325A9B", "#F6222E", "#FE00FA", "#FBE426", "#16FF32",
"black", "#3283FE", "#B00068", "#DEA0FD", "#B10DA1", "#E4E1E3",
"#90AD1C", "#FE00FA", "#85660D", "#3B00FB", "#822E1C", "coral2",
"#1CFFCE", "#1CBE4F", "#3283FE", "#FBE426", "#F7E1A0", "#325A9B",
"#2ED9FF", "#B5EFB5", "#5A5156", "#DEA0FD", "#FEAF16", "#683B79",
"#B10DA1", "#1C7F93", "#F8A19F", "dark orange", "#FEAF16", "#FBE426",
"Brown")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/utils.R")
# source("~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/bin/SLOcatoR_functions.R")
source(here::here("scRNA-seq/bin/SLOcatoR_functions.R"))
# Additional function
plot_subcluster <- function(seurat_obj, pattern) {
p <- seurat_obj@reductions$umap@cell.embeddings %>%
as.data.frame() %>%
mutate(cluster = seurat_obj$annotation_level_3) %>%
filter(str_detect(cluster, pattern)) %>%
ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
geom_point(size = 0.1) +
theme_classic()
p
}
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 37378 features across 65461 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, umap, harmony
DimPlot(seurat, cols = color_palette, pt.size = 0.2)
seurat <- FindSubCluster(
seurat,
cluster = "Follicular Th CXCL13+CBLB+",
subcluster.name = "annotation_level_5",
resolution = 0.1,
graph.name = "RNA_snn"
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 5769
## Number of edges: 106629
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9105
## Number of communities: 36
## Elapsed time: 0 seconds
DimPlot(seurat, group.by = "annotation_level_5", cols = color_palette)
Idents(seurat) <- "annotation_level_5"
clusters_interest <- seurat$annotation_level_5 %>%
unique() %>%
str_subset("^Follicular Th CXCL13") %>%
sort()
markers <- purrr::map(clusters_interest, function(x) {
group_1 <- clusters_interest[which(clusters_interest == x)]
group_2 <- clusters_interest[which(clusters_interest != x)]
print(group_1)
print(group_2)
df <- FindMarkers(
seurat,
ident.1 = group_1,
ident.2 = group_2,
only.pos = TRUE,
logfc.threshold = 0.5,
verbose = TRUE
)
df <- df %>%
rownames_to_column(var = "gene") %>%
arrange(desc(avg_log2FC))
df
})
## [1] "Follicular Th CXCL13+CBLB+_0"
## [1] "Follicular Th CXCL13+CBLB+_1" "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_1"
## [1] "Follicular Th CXCL13+CBLB+_0" "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_2"
## [1] "Follicular Th CXCL13+CBLB+_0" "Follicular Th CXCL13+CBLB+_1"
names(markers) <- clusters_interest
# path_save_markers <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Tfh_CXCL13_new.xlsx"
path_save_markers <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Tfh_CXCL13_new.xlsx")
openxlsx::write.xlsx(x = markers, file = path_save_markers, overwrite = TRUE)
clusters_to_exclude <- c("activated CD4 T", "Mitochondrial+ T cells",
"metabolic/poor-quality")
mask <- !(Idents(seurat) %in% clusters_to_exclude)
table(mask)
## mask
## FALSE TRUE
## 8667 56794
selected_cells <- colnames(seurat)[mask]
seurat <- subset(seurat, cells = selected_cells)
Let us exclude all the cells from Royal London (discussed elsewhere):
seurat <- subset(seurat, hospital != "Royal London")
hvg <- find_assay_specific_features(seurat)
seurat <- integrate_assays(
seurat,
assay_specific = TRUE,
shared_hvg = hvg,
assay_var = "assay",
n_dim = 30
)
seurat <- RunUMAP(seurat, reduction = "harmony", dims = 1:30)
DimPlot(seurat, cols = color_palette)
clusters_interest_2 <- "Th17"
th <- subset(seurat, idents = clusters_interest_2)
th <- subset(th, assay == "3P")
th <- th %>%
FindVariableFeatures() %>%
ScaleData() %>%
RunPCA() %>%
RunHarmony(group.by.vars = "donor_id", reduction = "pca", dims = 1:30) %>%
RunUMAP(reduction = "harmony", dims = 1:30)
markers_interest <- c("IFNG", "GATA3", "IL26", "IL21", "CCL3", "IL4")
feat_plots <- purrr::map(markers_interest, function(x) {
p <- FeaturePlot(th, features = x) +
scale_color_viridis_c(option = "magma")
p
})
feat_plots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
DimPlot(th)
# Cluster
th <- FindNeighbors(th, dims = 1:30, reduction = "harmony")
th <- FindClusters(th, resolution = 0.65)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1091
## Number of edges: 52799
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6121
## Number of communities: 5
## Elapsed time: 0 seconds
DimPlot(th)
# Markers
markers_th <- FindAllMarkers(th, only.pos = TRUE, logfc.threshold = 0.5)
All CD4 T cells:
seurat$annotation_paper <- case_when(
seurat$annotation_level_5 == "Naive" ~ "Naive",
seurat$annotation_level_5 == "Central Mem PASK+" ~ "CM Pre-non-Tfh",
seurat$annotation_level_5 == "Central Mem PASK-" ~ "CM PreTfh",
seurat$annotation_level_5 == "Memory T cells" ~ "T-Trans-Mem",
seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_1" ~ "T-Eff-Mem",
seurat$annotation_level_5 == "Th17" ~ "T-helper",
seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_2" ~ "Tfh T:B border",
seurat$annotation_level_5 == "Follicular Th CXCL13+CBLB+_0" ~ "Tfh-LZ-GC",
seurat$annotation_level_5 == "Follicular Th TOX2+" ~ "Tfh-LZ-GC",
seurat$annotation_level_5 == "Follicular Th CXCR5+" ~ "GC-Tfh-SAP",
seurat$annotation_level_5 == "CD200+TOX+" ~ "GC-Tfh-0X40",
seurat$annotation_level_5 == "CD4 Non-Tfh KLRB1+ " ~ "Tfh-Mem",
seurat$annotation_level_5 == "IL2RA+FOXP3+ Treg" ~ "Eff-Tregs",
seurat$annotation_level_5 == "Treg IKZF2+HPGD+" ~ "non-GC-Tf-regs",
seurat$annotation_level_5 == "naive Treg IKZF2+" ~ "GC-Tf-regs",
)
ordered_levels <- c("Naive", "CM Pre-non-Tfh", "CM PreTfh", "T-Trans-Mem",
"T-Eff-Mem", "T-helper", "Tfh T:B border", "Tfh-LZ-GC",
"GC-Tfh-SAP", "GC-Tfh-0X40", "Tfh-Mem", "Eff-Tregs",
"non-GC-Tf-regs", "GC-Tf-regs")
seurat$annotation_paper <- factor(
seurat$annotation_paper,
levels = ordered_levels
)
Idents(seurat) <- "annotation_paper"
T helper subsets: since we found a cluster of Th22 cells, we will recluster to find it.
th <- FindSubCluster(
th,
cluster = "3",
graph.name = "RNA_snn",
subcluster.name = "Th22",
resolution = 0.25
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 199
## Number of edges: 6063
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7688
## Number of communities: 2
## Elapsed time: 0 seconds
th$annotation_paper <- case_when(
th$Th22 == "0" ~ "Th2",
th$Th22 == "1" ~ "Th0",
th$Th22 == "2" ~ "Th17",
th$Th22 == "3_0" ~ "Th1",
th$Th22 == "3_1" ~ "Th22",
th$Th22 == "4" ~ "Th1/Th17"
)
ordered_levels_th <- c("Th0", "Th1", "Th2", "Th17", "Th1/Th17", "Th22")
th$annotation_paper <- factor(
th$annotation_paper,
levels = ordered_levels_th
)
Idents(th) <- "annotation_paper"
DimPlot(th, cols = color_palette)
Find all markers
# All CD4 T cells
markers_all_cd4 <- FindAllMarkers(seurat, only.pos = TRUE, logfc.threshold = 0.6)
markers_all_cd4 <- markers_all_cd4 %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC), group_by = TRUE) %>%
ungroup()
markers_all_dfs <- purrr::map(unique(markers_all_cd4$cluster), function(x) {
df <- markers_all_cd4[markers_all_cd4$cluster == x, ]
df <- df[, c(7, 1:6)]
df
})
names(markers_all_dfs) <- unique(markers_all_cd4$cluster)
markers_all_dfs <- markers_all_dfs[levels(seurat$annotation_paper)]
# Th cells
markers_th <- FindAllMarkers(th, only.pos = TRUE, logfc.threshold = 0.45)
markers_th <- markers_th %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC), group_by = TRUE) %>%
ungroup()
markers_th_dfs <- purrr::map(unique(markers_th$cluster), function(x) {
df <- markers_th[markers_th$cluster == x, ]
df <- df[, c(7, 1:6)]
df
})
names(markers_th_dfs) <- unique(markers_th$cluster)
markers_th_dfs <- markers_th_dfs[levels(th$annotation_paper)]
names(markers_th_dfs)[names(markers_th_dfs) == "Th1/Th17"] <- "Th1_Th17"
# Save Seurat objects
saveRDS(seurat, path_to_save)
saveRDS(th, path_to_save_th)
# Save markers
# path_save_markers <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_markers_annotated_level_5.xlsx"
names(markers_all_dfs)[names(markers_all_dfs) == "Tfh T:B border"] <- "Tfh T-B border"
path_save_markers <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_markers_annotated_level_5.xlsx")
openxlsx::write.xlsx(
x = markers_all_dfs,
file = path_save_markers,
overwrite = TRUE
)
# path_to_save_markers_th <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Th_subsets_annotated.xlsx"
path_to_save_markers_th <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/markers_Th_subsets_annotated.xlsx")
openxlsx::write.xlsx(
x = markers_th_dfs,
file = path_to_save_markers_th,
overwrite = TRUE
)
# Save UMAP
umap_level_5 <- DimPlot(seurat, cols = color_palette)
# path_to_save_umap1 <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_umap_level_5_2_annotated.png"
path_to_save_umap1 <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_T_umap_level_5_2_annotated.png")
ggsave(
filename = path_to_save_umap1,
plot = umap_level_5,
width = 14,
height = 12,
units = "cm"
)
umap_level_5_th <- DimPlot(th, cols = color_palette)
# path_to_save_umap2 <- "~/Desktop/CNAG/mnt_clust/tonsil_atlas/current/scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_Th_umap_annotated.png"
path_to_save_umap2 <- here::here("scRNA-seq/3-clustering/5-level_5/CD4_T/tmp/CD4_Th_umap_annotated.png")
ggsave(
filename = path_to_save_umap2,
plot = umap_level_5_th,
width = 14,
height = 12,
units = "cm"
)
sessionInfo()
## R version 4.0.1 (2020-06-06)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux Server release 6.7 (Santiago)
##
## Matrix products: default
## BLAS: /apps/R/4.0.1/lib64/R/lib/libRblas.so
## LAPACK: /apps/R/4.0.1/lib64/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=C LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.4 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.6 ggplot2_3.3.3 tidyverse_1.3.0 harmony_0.1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.0 Seurat_4.0.0 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.4.2 magrittr_2.0.1 tensor_1.5 cluster_2.1.0 ROCR_1.0-11 openxlsx_4.2.4 remotes_2.1.1 globals_0.14.0 modelr_0.1.8 matrixStats_0.58.0 colorspace_2.0-0 rvest_0.3.6 ggrepel_0.9.1 haven_2.3.1 xfun_0.21 crayon_1.4.1 jsonlite_1.7.2 spatstat_1.64-1 spatstat.data_2.0-0 survival_3.2-3 zoo_1.8-8 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.7 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 miniUI_0.1.1.1 viridisLite_0.3.0 xtable_1.8-4 reticulate_1.18 rsvd_1.0.3 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2 ellipsis_0.3.1 ica_1.0-2 farver_2.0.3 pkgconfig_2.0.3 sass_0.3.1 uwot_0.1.10 dbplyr_2.1.0 deldir_0.2-10 here_1.0.1
## [57] utf8_1.1.4 labeling_0.4.2 tidyselect_1.1.0 rlang_0.4.10 reshape2_1.4.4 later_1.1.0.1 munsell_0.5.0 cellranger_1.1.0 tools_4.0.1 cli_2.3.1 generics_0.1.0 broom_0.7.4 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.31 fs_1.5.0 fitdistrplus_1.1-3 zip_2.2.0 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-148 mime_0.10 xml2_1.3.2 compiler_4.0.1 rstudioapi_0.13 plotly_4.9.3 png_0.1-7 spatstat.utils_2.0-0 reprex_1.0.0 bslib_0.2.4 stringi_1.5.3 highr_0.8 ps_1.5.0 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.2-18 vctrs_0.3.6 pillar_1.5.0 lifecycle_1.0.0 BiocManager_1.30.10 lmtest_0.9-38 jquerylib_0.1.3 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.5.5 patchwork_1.1.1 R6_2.5.0 bookdown_0.21 promises_1.2.0.1 KernSmooth_2.23-17
## [113] gridExtra_2.3 parallelly_1.23.0 codetools_0.2-16 MASS_7.3-51.6 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.1 sctransform_0.3.2 mgcv_1.8-31 parallel_4.0.1 hms_1.0.0 grid_4.0.1 rpart_4.1-15 rmarkdown_2.7 Rtsne_0.15 shiny_1.6.0 lubridate_1.7.9.2